home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Tech Arsenal 1
/
Tech Arsenal (Arsenal Computer).ISO
/
tek-04
/
ddj0492.zip
/
STRING.ASC
< prev
next >
Wrap
Text File
|
1992-03-10
|
7KB
|
202 lines
_FINDING STRING DISTANCES_
by Ray Valdes
[LISTING ONE]
/***********************************************************************
LEVDIST.C -- Computing the Levenshtein distance (string-to-string edit)
by Ray Valdes, DDJ April 92
***********************************************************************/
#define TRUE 1
#define FALSE 0
#define private static
#define public /**/
typedef int bool;
private bool verbose_mode = TRUE;
typedef enum { MATCH, INS, DEL, SUB } opcode;
typedef struct
{ int cost;
char* name;
int delta_row;
int delta_col;
} operation;
#define COST(op) (optable[(int)op].cost) // for convenience
#define OPSTR(op) (optable[(int)op].name) // for convenience
private operation optable[] = //costs defined on a per-op basis
{
/*--cost, name, delta_row, delta_col---------------------------------*/
{ 0, "EQU", -1, -1}, /* a match or no-op backtracks to NorthWest */
{ 1, "INS", 0, -1}, /* insert op backtrack to the West */
{ 1, "DEL", -1, 0}, /* delete op backstracks to the North */
{ 1, "SUB", -1, -1}, /* substitution op backtracks to NorthWest */
};
typedef struct
{ int distance;
opcode op;
} matrix_cell;
#define NUM_ROWS 64
#define NUM_COLS NUM_ROWS
#define SIZEOF_STRING NUM_ROWS
private char A [SIZEOF_STRING];
private char B [SIZEOF_STRING];
private matrix_cell M [NUM_ROWS] [NUM_COLS]; // this is The Matrix
#define DIST(i,j) (M [(i)][(j)].distance) // for convenience
/****************************************************************/
private void say_hello (void);
private bool get_strings (void);
private void initialize_matrix (void);
private void calculate_cell (int row,int col);
private void print_cell (int row,int col);
private void calculate_matrix (int num_rows,int num_cols);
private void backtrack_matrix (int num_rows,int num_cols);
/****************************************************************/
public int
main(int argc,char **argv)
{ say_hello();
while(get_strings())
{ initialize_matrix();
calculate_matrix(strlen(A),strlen(B));
backtrack_matrix(strlen(A),strlen(B));
}
return 0;
}
/****************************************************************/
private void
say_hello(void)
{ if(verbose_mode) printf("\nLevenshtein distance, V1.0");
}
/****************************************************************/
private bool
get_strings(void)
{ char buffer[SIZEOF_STRING*3]; //arbitrarily big buffer
printf("\nEnter first string > "); gets(buffer);
if(buffer[0]=='\0') return FALSE;
strcpy(A,buffer);
printf("\nEnter second string > "); gets(buffer);
if(buffer[0]=='\0') return FALSE;
strcpy(B,buffer);
return TRUE;
}
/****************************************************************/
private void
initialize_matrix(void)
{ int row,col;
for(row=0,col=0; col<NUM_COLS; col++) // initialize the first row
{
M [row][col].distance = col;
M [row][col].op = INS;
}
for(row=0,col=0; row<NUM_ROWS; row++) // initialize the first column
{
M [row][col].distance = row;
M [row][col].op = DEL;
}
}
/****************************************************************/
private void
calculate_cell(int row,int col)
{ int dNorthWest = DIST(row-1, col-1);
int dWest = DIST(row , col-1);
int dNorth = DIST(row-1, col );
if(dWest < dNorth)
{ if(dWest < dNorthWest)
{ M [row][col].op = INS;
M [row][col].distance = dWest + COST(INS);
}
else // dNorthWest <= dWest < dNorth
{ opcode op;
op = ( A[row-1]==B[col-1] ) ? MATCH : SUB;
M [row][col].op = op;
M [row][col].distance = dNorthWest + COST(op);
}
}
else // dNorth <= dWest
{ if(dNorth < dNorthWest)
{ M [row][col].op = DEL;
M [row][col].distance = dNorth + COST(DEL);
}
else // dNorthWest <= dNorth <= dWest
{ opcode op;
op = ( A[row-1]==B[col-1] ) ? MATCH : SUB;
M [row][col].op = op;
M [row][col].distance = dNorthWest + COST(op);
}
}
}
/****************************************************************/
private void
calculate_matrix(int num_rows,int num_cols)
{ int row,col;
if(verbose_mode)
{ printf("\n\\\n \\COL |");
for(col=0; col < num_cols+1; col++)
printf("____%d____|");
printf("\nROW \\ | |");
for(col=1; col < num_cols+1; col++)
printf(" %c |", B [col-1]);
printf("\n 0 \\|");
for(row=0,col=0; col < num_cols+1; col++)
print_cell(row,col);
}
for(row=1; row<num_rows+1; row++)
{ if(verbose_mode) printf("\n% 2d %c |",row, A [row-1]);
print_cell(row,0);
for(col=1; col<num_cols+1; col++)
{
calculate_cell(row,col);
if(verbose_mode) print_cell(row,col);
}
}
}
/****************************************************************/
private void
print_cell(int row,int col)
{ printf(" %d %s ",DIST(row,col),OPSTR( M [row][col].op));
}
/****************************************************************/
private void
backtrack_matrix(int num_rows,int num_cols)
{ int dx,dy;
int i,j;
int row = num_rows;
int col = num_cols;
printf("\n\nLevenshtein distance is %d.\n",DIST(row,col));
printf("\nBacktrace: %s",A);
while(row>0 || col>0)
{ if( ( M [row][col].op != MATCH) && verbose_mode)
{ printf("\nD(%d,%d)=%d ", row,col,DIST(row,col));
printf("%s --> ",OPSTR( M [row][col].op));
for(i=1;i<row-(M[row][col].op==DEL?1:0);i++)
printf("%c", A[i-1]);
/*printf("_");*/
for(j=col-(M[row][col].op==INS?1:0);j<num_cols+1;j++)
printf("%c", B[j-1]);
}
dy = optable[(int)( M [row][col].op)].delta_row;
dx = optable[(int)( M [row][col].op)].delta_col;
row += dy;
col += dx;
}
}